# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/26 16:29
# @File: qu_10.py

"""
3、热门搜索小时时间段分析
[('20', 3479), ('23', 3087), ('21', 2989), ('22', 2499), ('01', 1365), ('10', 973), ('11', 875), ('05', 798), ('02', 756), ('19', 735), ('12', 644), ('14', 637), ('00', 504), ('16', 497), ('08', 476), ('04', 476), ('03', 385), ('09', 371), ('15', 350), ('06', 294), ('13', 217), ('18', 112), ('17', 77), ('07', 70)]
"""

from pyspark import SparkConf, SparkContext

if __name__ == '__main__':
    conf = SparkConf().setAppName("a").setMaster("local[*]")
    sc = SparkContext(conf=conf)
    # rdd = sc.textFile("../data/SogouQ.txt")
    rdd = sc.textFile("file:///tmp/pycharm_project_161/data/SogouQ.txt")
    # print(rdd.collect())
    rdd1 = rdd.map(lambda x: x.split("\t"))
    # print(rdd1.collect())
    rdd2 = rdd1.map(lambda x: (x[0].split(":")[0], 1))
    # print(rdd2.collect())
    rdd3 = rdd2.reduceByKey(lambda a, b: a + b)
    # print(rdd3.collect())
    rdd4 = rdd3.sortBy(lambda x: x[1], ascending=False)
    print(rdd4.collect())
    # print(rdd4.take(5))